In [1]:

    
import numpy as np
import matplotlib.pyplot as pl
import seaborn as sn
import emcee
import pymilne
%matplotlib inline

This notebook shows how to extract information from a spectral line using the weak-field approximation. It uses a Bayesian approach to the problem.

Index

Simple case

Biases

Errors in both coordinates

Simple case

Let us generate an artificial spectral line in the weak field regime and do the inference.



In [3]:

    
lambda0 = 6301.5080
JUp = 2.0
JLow = 2.0
gUp = 1.5
gLow = 1.833
lambdaStart = 6300.8
lambdaStep = 0.03
nLambda = 50

lineInfo = np.asarray([lambda0, JUp, JLow, gUp, gLow, lambdaStart, lambdaStep])

s = pymilne.milne(nLambda, lineInfo)


stokes = np.zeros((4,nLambda))

BField = 100.0
BTheta = 20.0
BChi = 0.0
VMac = 2.0
damping = 0.0
B0 = 0.8
B1 = 0.2
mu = 1.0
VDop = 0.085
kl = 5.0
modelSingle = np.asarray([BField, BTheta, BChi, VMac, damping, B0, B1, VDop, kl])
stokes = s.synth(modelSingle,mu)
wl = np.arange(nLambda)*lambdaStep + lambdaStart



In [4]:

    
f, ax = pl.subplots(ncols=2, nrows=1, figsize=(12,6))
ax = ax.flatten()
labels = ['I/I$_c$','V/I$_c$']
which = [0,3]
for i in range(2):
    ax[i].plot(wl, stokes[which[i],:])
    ax[i].set_xlabel('Wavelength [$\AA$]')
    ax[i].set_ylabel(labels[i])
    ax[i].ticklabel_format(useOffset=False)
pl.tight_layout()









    



/usr/pkg/python/Python-2.7.6/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-linux-x86_64.egg/matplotlib/font_manager.py:1282: UserWarning: findfont: Font family [u'Arial'] not found. Falling back to Bitstream Vera Sans
  (prop.get_family(), self.defaultFamily[fontext]))



In [5]:

    
sigma = 1e-4
stI = stokes[0,:] + sigma*np.random.randn(nLambda)
stV = stokes[3,:] + sigma*np.random.randn(nLambda)
diffI = np.gradient(stI, lambdaStep)

Let us check if the line is in the weak-field regime. For this, we plot Stokes V vs. the wavelength derivative of Stokes I and check if it is a straight line.



In [1]:

    
pl.plot(diffI, stV)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-4c884769c567> in <module>()
----> 1 pl.plot(diffI, stV)

NameError: name 'diffI' is not defined

We can do a linear fit and get the magnetic flux density. Remember that the fit has to be $y=a*x$. To do the fit, write the $\chi^2$ and optimize it.



In [8]:

    
coef = np.sum(stV*diffI)/np.sum(diffI**2)
print coef
print "Original BLOS={0:6.2f} Mx cm-2 - Inferred BLOS={1:6.2f} Mx cm-2".format(BField*np.cos(BTheta*np.pi/180.0),-coef / (4.6686e-13*6301.**2*1.5))









    



-0.00304243983052
Original BLOS= 93.97 Mx cm-2 - Inferred BLOS=109.43 Mx cm-2

Let us investigate a little the $\chi^2$ surface



In [7]:

    
class linearFit(object):
    def __init__(self, diffI, V, noise):
        self.diffI = diffI
        self.V = V
        self.noise = noise
        self.lower = np.asarray([0.0, 0.0])
        self.upper = np.asarray([500.0, 180.0])

    def logPosterior(self, theta):
        if ( (theta[0] < self.lower[0]) | (theta[0] > self.upper[0]) | (theta[1] < self.lower[1]) | (theta[1] > self.upper[1]) ):
            return -np.inf
        else:
            model = -4.6686e-13 * 6301.0**2 * 1.5 * theta[0] * np.cos(theta[1] * np.pi/180.0) * self.diffI
            return -np.sum((self.V-model)**2 / (2.0*self.noise**2))        

    def sample(self):        
        ndim, nwalkers = 2, 500
        self.theta0 = np.asarray([50.0,30.0])
        p0 = [self.theta0 + 0.01*np.random.randn(ndim) for i in range(nwalkers)]
        self.sampler = emcee.EnsembleSampler(nwalkers, ndim, self.logPosterior)
        self.sampler.run_mcmc(p0, 300)



In [8]:

    
out = linearFit(diffI, stV, sigma)



In [9]:

    
out.sample()



In [10]:

    
BSamples = out.sampler.chain[:,-20:,0].flatten()
thBSamples = out.sampler.chain[:,-20:,1].flatten()



In [119]:

    
f, ax = pl.subplots(ncols=2, nrows=3, figsize=(12,8))
ax[0,0].plot(BSamples)
ax[0,1].hist(BSamples)
ax[0,0].set_ylabel('B')
ax[0,1].set_xlabel('B')
ax[1,0].plot(thBSamples)
ax[1,1].hist(thBSamples)
ax[1,0].set_ylabel(r'$\theta_B$')
ax[1,1].set_xlabel(r'$\theta_B$')
ax[2,0].plot(BSamples*np.cos(thBSamples*np.pi/180.0))
ax[2,1].hist(BSamples*np.cos(thBSamples*np.pi/180.0))
ax[2,0].set_ylabel(r'B*cos($\theta_B$)')
ax[2,1].set_xlabel(r'B*cos($\theta_B$)')
pl.tight_layout()
pl.savefig('weakSamplingSimple.png')



In [14]:

    
f, ax = pl.subplots(figsize=(12,10))
ax.hexbin(BSamples,thBSamples)
ax.set_xlabel('B [G]')
ax.set_ylabel(r'$\theta_B$ [deg]')









    Out[14]:





<matplotlib.text.Text at 0x9795310>

Biases

The maximum likelihood solution can be written analytically when using $B_\parallel$ and $B_\perp$. It is interesting to analyze the behavior of $B_\perp$ when there is noise. It is given by:

$$ B_\perp^2 = \frac{1}{C^2} \frac{\left[ \left(\sum_i Q_i I_i'' \right)^2+ \left(\sum_i U_i I_i'' \right)^2 \right]^{1/2}} {\sum_i I_i''^2} $$

Imagine the case of pure noise in Stokes $Q$ and $U$.



In [19]:

    
stI = stokes[0,:] + sigma*np.random.randn(nLambda)
stQ = sigma*np.random.randn(nLambda)
stU = sigma*np.random.randn(nLambda)
stIp = 6301.0**2*1.5*np.gradient(stI, lambdaStep)
stIpp = 0.25*6301.0**2*1.5*np.gradient(stIp, lambdaStep)



In [23]:

    
C = 4.67e-13
Bpar = np.zeros(500)
Bperp = np.zeros(500)
for i in range(500):
    stV = stokes[3,:] + sigma*np.random.randn(nLambda)
    stQ = stokes[1,:] + sigma*np.random.randn(nLambda)
    stU = stokes[2,:] + sigma*np.random.randn(nLambda)
    Bpar[i] = -1.0/C * np.sum(stV*stIp) / np.sum(stIp**2)
    Bperp[i] = np.sqrt(1.0/C**2 * np.sqrt( np.sum(stQ*stIpp)**2 + np.sum(stU*stIpp)**2 ) / np.sum(stIpp**2))
f, ax = pl.subplots(ncols=2, nrows=2, figsize=(9,9))
ax[0,0].hist(Bpar)
ax[0,1].hist(Bperp)
ax[1,0].hist(180.0/np.pi*np.arctan(Bperp, Bpar))









    Out[23]:





(array([   1.,    1.,    0.,    1.,    4.,   10.,   18.,   59.,  184.,  222.]),
 array([ 87.83371561,  88.02342117,  88.21312672,  88.40283228,
         88.59253784,  88.7822434 ,  88.97194896,  89.16165451,
         89.35136007,  89.54106563,  89.73077119]),
 <a list of 10 Patch objects>)

Errors in both coordinates

In fact, we always have noise both in Stokes V and I (typically being larger in Stokes I). Therefore, our model fitting has to take this into account.



In [78]:

    
lambda0 = 6301.5080
JUp = 2.0
JLow = 2.0
gUp = 1.5
gLow = 1.833
lambdaStart = 6300.8
lambdaStep = 0.015
nLambda = 100

lineInfo = np.asarray([lambda0, JUp, JLow, gUp, gLow, lambdaStart, lambdaStep])

s = pymilne.milne(nLambda, lineInfo)


stokes = np.zeros((4,nLambda))

BField = 100.0
BTheta = 20.0
BChi = 0.0
VMac = 2.0
damping = 0.0
B0 = 0.8
B1 = 0.2
mu = 1.0
VDop = 0.085
kl = 5.0
modelSingle = np.asarray([BField, BTheta, BChi, VMac, damping, B0, B1, VDop, kl])
stokes = s.synth(modelSingle,mu)
wl = np.arange(nLambda)*lambdaStep + lambdaStart

stokesNoise = np.copy(stokes)
sigmaI = 5e-3
sigmaV = 1e-3
stokesNoise[0,:] = stokes[0,:] + sigmaI*np.random.randn(nLambda)
stokesNoise[3,:] = stokes[3,:] + sigmaV*np.random.randn(nLambda)
diffI = np.gradient(stokesNoise[0,:], lambdaStep)



In [79]:

    
f, ax = pl.subplots(ncols=3, nrows=1, figsize=(15,6))
ax = ax.flatten()
labels = ['I/I$_c$','V/I$_c$']
which = [0,3]
for i in range(2):
    ax[i].plot(wl, stokesNoise[which[i],:])
    ax[i].plot(wl, stokes[which[i],:])
    ax[i].set_xlabel('Wavelength [$\AA$]')
    ax[i].set_ylabel(labels[i])
    ax[i].ticklabel_format(useOffset=False)
ax[2].plot(wl, diffI)
ax[2].set_xlabel('Wavelength [$\AA$]')
ax[2].set_ylabel('dI/d$\lambda$')
ax[2].ticklabel_format(useOffset=False)
pl.tight_layout()
sigmaDiffI = np.std(diffI[0:25])
print "Estimated noise in dIdl in units of sigma_I={0}".format(sigmaDiffI / sigmaI)









    



Estimated noise in dIdl in units of sigma_I=50.9989715506



In [120]:

    
f, ax = pl.subplots(figsize=(12,6))
ax.errorbar(diffI, stokesNoise[3,:], yerr=sigmaV, xerr=sigmaDiffI, fmt='.')









    Out[120]:





<Container object of 3 artists>

The exact value of the noise in the derivative depends on how you carry out the numerical derivative. For instance, if we use a first order derivative formula, the error propagation formula states:

$$ \sigma_{I'}^2 = \left( \frac{2}{\Delta \lambda} \right)^2 \sigma_I^2 $$

First, let us compute the maximum-likelihood solution neglecting errors in x. This is obtained just by:



In [117]:

    
coef = np.sum(stokesNoise[3,:]*diffI)/np.sum(diffI**2)
print "Estimated field = {0} G".format(coef / (-4.6686e-13*6301.**2*1.5))
print "Real field = {0} G".format(BField*np.cos(BTheta*np.pi/180.0))









    



Estimated field = 73.1653062299 G
Real field = 93.9692620786 G

Now we sample the distribution taking into account the presence of uncertainties in both axis. Following standard techniques, we assume that our measurement fulfills

$$ y_i = z_i + e_i $$

where $z_i$ is our predictor, characterized by a distribution $p_Z(z_i)$ and $e_i$ is the uncertainty in the measurement, characterized by the distribution $p_E(e_i)$. In such case, the distribution for $y_i$ is given by

$$ p(y_i) = \int dz_i p_Z(z_i) p_E(y_i-z_i) $$

In our case, we are assuming that $z_i$ is uncertain because the points at which they are evaluated are uncertain. Assume that the uncertainty in $x_i$ is Gaussian, so that

$$ p(x_i) = N(x_{i0},\sigma_x^2) $$

where $x_{i0}$ is the nominal value of $x_i$. Now, we know that, in our case, $z_i=\alpha x_i$, because they follow a linear relationship. In this case, it is easy to compute $p_Z(z_i)$ from the change of variables:

$$ p_Z(z_i) = p_X(x_i) \left| \frac{dx_i}{dz_i} \right| = p_X \left( \frac{z_i}{\alpha} \right) \left| \frac{dx_i}{dz_i} \right| $$

Taking this into account, the final likelihood function is

$$ p(y_i) = \frac{1}{\sqrt{2\pi \left(\sigma_y^2 + \alpha \sigma_x^2 \right)}} \exp \left[ -\frac{1}{2} \frac{(y_i-\alpha x_i)^2}{\sigma_y^2 + \alpha \sigma_x^2} \right] $$

Another equivalent way of seeing this is to include the unobserved real values of the $x$ and $y$ points into the Bayesian problem and finally marginalizing it.

$$ p(\mathbf{x},\mathbf{y},\alpha|\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}) = p(\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}|\mathbf{x},\mathbf{y},\alpha) p(\mathbf{x},\mathbf{y},\alpha) $$$$ p(\alpha|\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}) = \int d \mathbf{x} d \mathbf{y} p(\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}|\mathbf{x},\mathbf{y},\alpha) p(\mathbf{x},\mathbf{y},\alpha) $$

Given that we are assuming that $y=\alpha x$, we can remove $y$ from the previous equations (using a Dirac delta prior) and finally compute

$$ p(\alpha|\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}) = \int d \mathbf{x} p(\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}|\mathbf{x},\alpha) p(\mathbf{x},\alpha) $$

where the likelihood for each point is given by a 2D Gaussian distribution

$$ p(x_\mathrm{obs,i},y_\mathrm{obs,i}|\mathbf{x},\alpha) = \frac{1}{2\pi \sqrt{\sigma_x^2 \sigma_y^2}} \exp \left[ -\frac{1}{2} \left( \frac{(x-x_\mathrm{obs,i})^2}{\sigma_x^2} + \frac{(\alpha x-y_\mathrm{obs,i})^2}{\sigma_y^2} \right) \right] $$

If one computes the integral over $x$, the same result is obtained.



In [98]:

    
class linearBothAxisFit(object):
    def __init__(self, diffI, V, noiseDiffI, noiseV):
        self.diffI = diffI
        self.V = V
        self.noiseDiffI = noiseDiffI
        self.noiseV = noiseV
        self.lower = np.asarray([-0.01])
        self.upper = np.asarray([0.01])

    def logPosterior(self, theta):
        if ( (theta[0] < self.lower[0]) | (theta[0] > self.upper[0]) ):
            return -np.inf
        else:
            model = theta[0] * self.diffI
            variance = self.noiseV**2 + theta[0]**2 * self.noiseDiffI**2
            return -0.5*np.sum(np.log(variance)) - 0.5 * np.sum((self.V-model)**2 / variance)

    def sample(self):        
        ndim, nwalkers = 1, 500
        self.theta0 = np.asarray([-0.005])
        p0 = [self.theta0 + 0.001*np.random.randn(ndim) for i in range(nwalkers)]
        self.sampler = emcee.EnsembleSampler(nwalkers, ndim, self.logPosterior)
        self.sampler.run_mcmc(p0, 300)
        
class linearSingleAxisFit(object):
    def __init__(self, diffI, V, noiseV):
        self.diffI = diffI
        self.V = V        
        self.noiseV = noiseV
        self.lower = np.asarray([-0.01])
        self.upper = np.asarray([0.01])

    def logPosterior(self, theta):
        if ( (theta[0] < self.lower[0]) | (theta[0] > self.upper[0]) ):
            return -np.inf
        else:
            model = theta[0] * self.diffI
            variance = self.noiseV**2
            return -0.5*np.sum(np.log(variance)) - 0.5 * np.sum((self.V-model)**2 / variance)

    def sample(self):        
        ndim, nwalkers = 1, 500
        self.theta0 = np.asarray([-0.005])
        p0 = [self.theta0 + 0.001*np.random.randn(ndim) for i in range(nwalkers)]
        self.sampler = emcee.EnsembleSampler(nwalkers, ndim, self.logPosterior)
        self.sampler.run_mcmc(p0, 300)



In [105]:

    
outBoth = linearBothAxisFit(diffI, stokesNoise[3,:], sigmaDiffI, sigmaV)
outBoth.sample()
samplesBoth = outBoth.sampler.chain[:,-20:,0].flatten() / (-4.6686e-13*6301.**2*1.5)



In [106]:

    
outSingle = linearSingleAxisFit(diffI, stokesNoise[3,:], sigmaV)
outSingle.sample()
samplesSingle = outSingle.sampler.chain[:,-20:,0].flatten() / (-4.6686e-13*6301.**2*1.5)



In [122]:

    
f, ax = pl.subplots(ncols=2, nrows=1, figsize=(15,6))
ax[0].plot(samplesBoth)
ax[0].plot(samplesSingle)
ax[1].hist(samplesBoth, bins=40, label='xy errors')
ax[1].hist(samplesSingle, bins=40, label='y errors')
ax[0].set_ylabel(r'$\alpha$')
ax[1].set_xlabel(r'$\alpha$')
ax[0].axhline(BField*np.cos(BTheta*np.pi/180.0))
ax[1].axvline(BField*np.cos(BTheta*np.pi/180.0))
pl.legend()









    Out[122]:





<matplotlib.legend.Legend at 0x13cdeb50>



In [ ]: